### Set the directory
setwd("---")

library(openEBGM)
library(tidyr)
library(dplyr)
library(tibble)

hpdrug_df = read.csv(file = 'hypts_actsub_frequency.csv', header=T, sep="$")
top_drug = hpdrug_df[hpdrug_df$newactivesub > 500, ]$X

### import the original pulmonary meddra high level term
pulm_ade_df <- read.csv(file = 'pulm_meddra_hypts.csv', header=T, sep="$")
pulm_ade <- toupper(unique(pulm_ade_df$hlt))

### save 30 pulm hlt for pca
pulm_ade_df <- read.csv(file = 'final_30_pulm_hlt.csv', header=T, sep="\t")
pulm_ade <- pulm_ade_df$x

dr_ade_df = read.csv(file = 'hypts_hlt_df.csv', header=T, sep="$", na.strings = c("", "NA"))

dr_ade = dr_ade_df[,-1]
colnames(dr_ade) <- c('id',  'var1',  'var2')
length(unique(dr_ade$var1))
length(unique(dr_ade$var2))

### selected only top used drugs
dr_ade = dr_ade[dr_ade$var1 %in% top_drug,]
dr_ade <- mutate_all(dr_ade, funs(toupper))

### count id per ATC class
atc_class <- read.csv(file="hypts_134_drugs.csv", header=T)
colnames(atc_class) <- c('var1', 'atc')
pid_atc <- left_join(dr_ade, atc_class)

### generate E, N, prr, rr, 
processed <- processRaw(dr_ade, stratify = FALSE, zeroes = FALSE)

### using RR evaluate drug and ade
suspicious <- processed[processed$RR >= 5, ]
nrow(suspicious); nrow(processed); nrow(suspicious)/nrow(processed)
head(suspicious, 5)

worst_drug <- suspicious %>%
  select(var1, var2) %>%
  group_by(var1) %>%          # Then, with the filtered data, group it by "bb"
  summarise(Unique_Elements = n_distinct(var2)) %>%
  arrange(desc(Unique_Elements)) %>%
  print(n=50)
# write.table(worst_drug, 'suspicious_drugs_hlt_rr_5.csv', sep = "\t")

### pulm data
pulm_suspicious_ade <- processed %>%
  filter(var2 %in% pulm_ade, RR >= 2) %>% 
  arrange(var1, -RR)
pulm_suspicious_ade
# write.table(pulm_suspicious_ade, 'suspicious_pulm_drugs_hlt.csv', sep = "\t")

worst_pulm_drug <- pulm_suspicious_ade %>%
  select(var1, var2) %>%
  group_by(var1) %>%          # Then, with the filtered data, group it by "bb"
  summarise(Unique_Elements = n_distinct(var2)) %>%
  arrange(desc(Unique_Elements))
# write.table(worst_pulm_drug, 'suspicious_pulm_drugs_hlt_rr.csv', sep = "\t")

worst_pulm_ade <- pulm_suspicious_ade %>%
  select(var1, var2) %>%
  group_by(var2) %>%          # Then, with the filtered data, group it by "bb"
  summarise(Unique_Elements = n_distinct(var1)) %>%
  arrange(desc(Unique_Elements))
# write.table(worst_pulm_ade, 'suspicious_pulm_hlt_rr.csv', sep = "\t")

### data squash for hyperparameters
squash1 <- squashData(processed)
### AutoHyper
theta_init <- data.frame(alpha1 = c(0.2, 0.1, 0.3, 0.5, 0.2),
                         beta1  = c(0.1, 0.1, 0.5, 0.3, 0.2),
                         alpha2 = c(2,   10,  6,   12,  5),
                         beta2  = c(4,   10,  6,   12,  5),
                         p      = c(1/3, 0.2, 0.5, 0.8, 0.4))

hyper_estimates <- autoHyper(squash1, theta_init = theta_init, squashed = TRUE, zeroes = FALSE, max_pts = 60000)
(theta_hat <- hyper_estimates$estimates) 

theta_hat <- list(c(0.006453914, 0.058069410, 2.948294844, 2.787243060, 0.093657482))

qn <- Qn(theta_hat, N = processed$N, E = processed$E)
head(qn)
identical(length(qn), nrow(processed))
summary(qn)

processed$ebgm <- ebgm(theta_hat, N = processed$N, E = processed$E, qn  = qn)
head(processed)
processed[order(processed$N),]

processed$QUANT_05 <- quantBisect(5, theta_hat = theta_hat,
								N = processed$N, E = processed$E, qn = qn)
processed$QUANT_95 <- quantBisect(95, theta_hat = theta_hat,
								N = processed$N, E = processed$E, qn = qn)
head(processed)
processed[order(-processed$N),]

suspicious <- processed[processed$QUANT_05 >= 1, ]
nrow(suspicious); nrow(processed); nrow(suspicious)/nrow(processed)

worst_drug <- suspicious %>%
				select(var1, var2) %>%
					group_by(var1) %>%          # Then, with the filtered data, group it by "bb"
						summarise(Unique_Elements = n_distinct(var2)) %>%
							arrange(desc(Unique_Elements)) %>%
			   print(n=50)
# write.table(worst_drug, file = 'suspicious_drugs_hlt.csv', sep = "\t")
  
worst_ade <- suspicious %>%
				select(var1, var2) %>%
					group_by(var2) %>%          # Then, with the filtered data, group it by "bb"
						summarise(Unique_Elements = n_distinct(var1)) %>%
							arrange(desc(Unique_Elements)) %>%
			 print(n=50)
# write.table(worst_ade, 'suspicious_hlt.csv', sep = "\t")

### Pulmanory
pulm_suspicious_ade <- processed %>%
						filter(var2 %in% pulm_ade, QUANT_05 > 1) %>% 
							arrange(var1, -ebgm)
# write.table(pulm_suspicious_ade, 'suspicious_hypts_pulm_drug_hlt.csv', sep = "\t")

worst_pulm_drug <- pulm_suspicious_ade %>%
						select(var1, var2) %>%
							group_by(var1) %>%          # Then, with the filtered data, group it by "bb"
								summarise(Unique_Elements = n_distinct(var2)) %>%
									arrange(desc(Unique_Elements)) %>%
				   print(n=50)
# write.table(worst_pulm_drug, 'suspicious_pulm_drugs_hlt.csv', sep = "\t")

worst_pulm_ade <- pulm_suspicious_ade %>%
					select(var1, var2) %>%
						group_by(var2) %>%          # Then, with the filtered data, group it by "bb"
							summarise(Unique_Elements = n_distinct(var1)) %>%
								arrange(desc(Unique_Elements)) %>%
				  print(n=27)
# write.table(worst_pulm_ade, 'suspicious_pulm_hlt.csv', sep = "\t")

tabbed <- table(suspicious$var1)
head(tabbed[order(tabbed, decreasing = TRUE)])
# write.table(processed, 'all_hypts_EBGM_hlt.csv', sep = '$')

pulm_processed <- processed %>%
					filter(var2 %in% pulm_ade)
# write.table(pulm_processed, 'all_hypts_pulm_EBGM_hlt.csv', sep = '$')  




